import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
from matplotlib import pylab
import warnings
import socket
import yaml
import matplotlib as plt
import scanpy.external as sce
import os
warnings.filterwarnings('ignore')
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
with open("../data/resources/rcParams.yaml") as f:
rcParamsDict = yaml.full_load(f)
for k in rcParamsDict["rcParams"]:
print("{} {}".format(k,rcParamsDict["rcParams"][k]))
plt.rcParams[k] = rcParamsDict["rcParams"][k]
for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
print("{} {}".format(k1,rcParamsDict[k1]))
scanpy==1.8.0 anndata==0.8.0 umap==0.4.6 numpy==1.22.2 scipy==1.6.2 pandas==1.2.3 scikit-learn==0.24.1 statsmodels==0.13.5 python-igraph==0.9.1 louvain==0.7.0 leidenalg==0.8.3 figure.dpi 80 savefig.dpi 500 figure.figsize [10, 10] axes.facecolor white dotSize 20
outdir = "../data/output"
with open("../data/resources/iPSC_lines_map.yaml", 'r') as f:
iPSC_lines_map = yaml.load(f, Loader=yaml.FullLoader)["lines"]
Multiplexing = sc.read_h5ad(outdir+"/adatas/MultiplexingPreprocessing_JoinScale.h5ad")
sc.tl.pca(Multiplexing, svd_solver='arpack')
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:08)
sc.pl.embedding(Multiplexing, "X_pca" ,color=["cellID_newName","dataset","stage"],ncols=3, size = 10, wspace=.4, legend_fontsize = "xx-large")
#sce.pp.harmony_integrate(Multiplexing, 'dataset', theta = 0, sigma= 0.1)
sce.pp.harmony_integrate(Multiplexing, 'dataset', adjusted_basis="X_pca", max_iter_harmony=20)
#'X_pca_harmony' in Multiplexing.obsm
2023-07-23 18:06:19,119 - harmonypy - INFO - Iteration 1 of 20 2023-07-23 18:06:29,097 - harmonypy - INFO - Iteration 2 of 20 2023-07-23 18:06:39,241 - harmonypy - INFO - Converged after 2 iterations
sc.pl.embedding(Multiplexing, "X_pca" ,color=['MKI67', 'S100B',"NES","PAX6","DCX",'STMN2',"DCN", "BGN"])
sc.pl.embedding(Multiplexing, "X_pca" ,color=["cellID_newName","dataset","stage"],ncols=3, size = 10, legend_fontsize = "xx-large")
test = Multiplexing
test.obsm["X_pca"] = test.obsm["X_pca"]
sc.pl.pca_variance_ratio(test, log=True)
del test
sc.pp.neighbors(Multiplexing,use_rep="X_pca",n_neighbors=100, n_pcs=15)
computing neighbors
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:49)
sc.tl.leiden(Multiplexing, resolution=5, key_added="leiden_5")
running Leiden clustering
finished: found 50 clusters and added
'leiden_5', the cluster labels (adata.obs, categorical) (0:00:33)
sc.tl.umap(Multiplexing)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:34)
genes = pd.array(["MKI67","CDC20","PAX6","NES","HOPX","DCX","STMN2","NEUROD6","DCN", "BGN","NEUROD2","MAP2","SYN1","SLC17A6", "SATB2","BCL11B","TBR1", "GAD1","GAD2","DLX2","DLX5","DLX6-AS1","SLC32A1","S100B", "GFAP", "AQP4", "OLIG1", "GAPDH"], dtype="category")
sc.pl.umap(Multiplexing,color= genes[genes.isin(Multiplexing.var_names) ],size = 10, ncols=3, vmin='p1.5',vmax='p98.5' )